#--
# This file is part of uhferret.
#
# Author::    Peter Lane
# Copyright:: Copyright 2012, Peter Lane.
# License::   GPLv3
#
# uhferret is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# uhferret is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with uhferret.  If not, see <http://www.gnu.org/licenses/>.

require 'find'
require 'uhferret'
require 'utils'
require 'webrick'
include WEBrick

@@next_upload = 0  # global variable, keeps track of number of uploads, for naming folders
@@next_report = 0  # global variable, keeps track of number of reports created

module UHFerret

  # Displays a welcome page, providing a field to upload the zipped file.
  # On pressing 'submit', runs Ferret and passes results to FerretResultsServlet.
  class FerretHomeServlet < HTTPServlet::AbstractServlet

    # Returns the 'welcome page' html.
    def do_GET(req, res)
      res['Content-Type'] = "text/html"
      res.body = <<BODY
<html><body><h1>Ferret Server</h1>
    <p>
    Ferret is a tool for detecting copying in groups of documents, 
    and was created by the (now defunct) Plagiarism Detection Group, 
    University of Hertfordshire.
    </p>
    <form method="POST" enctype="multipart/form-data">
    <p>Compressed file: <input type="file" name="data" size="40">
                <p><input type="submit"/>
    </p>
    </form>

<hr><h2>Instructions for use</h2>
<p>
<ol>
    <li>Construct a compressed folder of your files in a way suitable for 
    your own computer.  The Ferret Server will handle a compressed 
    folder in one of the following forms: 
    #{Utils::CompressedFileExtensions.map {|ext| "<tt>.#{ext}</tt>"}.join(", ")}.
    The files within it may be as:
    <ul>
    <li>plain text files</li>
    #{if Utils.command_present?("abiword")
        "<li>word-processed files (such as <tt>doc</tt> or <tt>rtf</tt> files)</li>"
      else
        ""
      end
    }
    #{if Utils.command_present?("pdftotext")
        "<li><tt>pdf</tt> documents</li>"
      else
        ""
      end
    }
    </ul>
    Files may contain natural language text or computer programs (C-type 
    languages).
    </li>
    <li>Use the 'Browse' button to select your compressed file.
    </li>
    <li>Once Ferret has finished analysing the documents, the display will show 
    a table of the top 100 results.
    </li>
    <li>Click on the 'view' link beside each pair to see a report of 
    the comparisons found in that pair of documents.  Use the print option of 
    your browser to preserve a copy (e.g. using 'print to pdf').
    </li>
</ol>
</p>
<hr><font size=-1>Ferret home page generated on: #{Time.now}<br />.
</font>
</body>
</html>
BODY
    end

    # Convenience method to check if a string ends with given ending.
    def endsWith?(str, str_end)
      return false if str.length < str_end.length
      str[-str_end.length .. -1] == str_end
    end

    # Checks if given _filename_ is an example of a compressed file.
    def isCompressedFile? filename
      Utils::CompressedFileExtensions.any? {|e| endsWith?(filename, e) }
    end

    # If _filename_ names a known compressed file format, it is decompressed
    # and deleted.
    def decompress_file filename
      if endsWith?(filename, "rar")
        `unrar x #{filename}` if Utils.command_present? "unrar"
      elsif endsWith?(filename, "tbz2") || endsWith?(filename, "tar.bz2")
        `tar jxf #{filename}` if Utils.command_present? "tar"
      elsif endsWith?(filename, "tgz") || endsWith?(filename, "tar.gz")
        `tar zxf #{filename}` if Utils.command_present? "tar"
      elsif endsWith?(filename, "zip")
        `unzip #{filename}` if Utils.command_present? "unzip"
      end 
      File.delete filename # remove the compressed folder
    end

    # find all files in given folder and add their names to a definitions file
    # -- return true if files are text documents, or false if not
    def create_file_definitions folder
      text_files = true
      Dir.chdir folder
      files = []
      Find.find(folder) do |filename|
        next unless File.file?(filename) # ignore directories
        files << filename
        text_files = false if Utils.is_code?(filename)
      end
      # write the names of valid files into a definitions file
      File.open("ferret-file-definitions.def", "w") do |defn_file|
        files.each do |f| 
          defn_file.puts f if Utils.valid_document? f
        end
      end
      return text_files
    end

    # this method is triggered when the user clicks on 'submit query'
    def do_POST(req, res)
      upload_dir = "Upload#{@@next_upload}" # create a unique folder for user's files
      @@next_upload += 1

      Dir.mkdir $base unless File.exists? $base
      Dir.mkdir "#{$base}/#{upload_dir}"
      upload_data = req.query["data"]
      filename = upload_data.filename.gsub(' ', "-") # replace spaces
      uploaded_file = "#{$base}/#{upload_dir}/#{filename}"
      File.open(uploaded_file, "wb") do |file| # do the actual upload of the data
        upload_data.each_data do |data|
          file << data.to_s
        end
      end

      # if uploaded file is a compressed file, then decompress and compute similarities
      if isCompressedFile?(uploaded_file)
        Dir.chdir "#{$base}/#{upload_dir}"
        decompress_file File.basename(uploaded_file)
        is_text = create_file_definitions Dir.pwd

        # do the computation of similarities
        # -- output to html table with given folder name, using file definition list
        `#{FERRET} #{is_text ? "-t" : "-c"} -w -f ferret-file-definitions.def > results.html`
        res['Content-Type'] = "text/html"
        res.body = "<meta HTTP-EQUIV=\"REFRESH\" content=\"0; url=#{$base}/#{upload_dir}/results.html\">"
      else
        res['Content-Type'] = "text/html"
        res.body = %{<html><body><h1>Error</h1> 
      <p>You did not submit a valid zip file.</p>
      <p><a href="/ferret/home">Return to Ferret home page</a>.</p>
      </body></html>}
      end
    end
  end

  # This servlet is triggered by a click on 'Download' link in report table
  # It creates the xml report comparing two documents
  class FerretReportServlet < HTTPServlet::AbstractServlet

    # Handles the request to create a report in xml format.
    def do_GET(req, res)
      upload_dir = req.query['upload']
      file1 = req.query['file1'].gsub("%20", "\ ")
      file2 = req.query['file2'].gsub("%20", "\ ")
      report_name = "#{upload_dir}/report#{@@next_report}.xml"
      @@next_report += 1
      Dir.chdir "#{upload_dir}"

      `#{FERRET} #{Utils.is_code?(file1) ? "-c" : "-t"} -x "#{report_name}" "#{file1}" "#{file2}"`
      write_style_sheet File.dirname(report_name)
      res['Content-Type'] = "text/html"
      res.body = "<meta HTTP-EQUIV=\"REFRESH\" content=\"0; url=#{report_name}\">"
    end

    private
    def write_style_sheet dir
      File.open("#{dir}/uhferret.xsl", "w") do |f|
        f.puts <<STYLESHEET
<?xml version="1.0" encoding="ISO-8859-1"?>

<html xsl:version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns="http://www.w3.org/1999/xhtml">
  <head>
    <style> <!-- style sheet for document -->
      h1 {background-color: #d0d0d0} <!-- add a background to make headings stand out -->
      h2 {background-color: #d0d0d0}
      .highlight {font-weight:bold; color:blue}  <!-- highlighted text style -->
      .normal {font-weight:normal}               <!-- normal text style      -->
    </style> 
  </head>
  <body>
    <h1>UH-Ferret: Document comparison</h1>

    <!-- display top-level information -->
    <p>Common trigrams: <xsl:value-of select="uhferret/common-trigrams"/></p>
    <p>Similarity: <xsl:value-of select="uhferret/similarity"/></p>

    <!-- work through each document -->
    <xsl:for-each select="uhferret/document">
      <!-- display document-level information -->
      <h2>Document: <xsl:value-of select="source"/></h2>
      <p>Number of trigrams: <xsl:value-of select="num-trigrams"/></p>
      <p>Containment in other document: <xsl:value-of select="containment"/></p>

      <!-- work through each block in text, displaying as highlighted or normal -->
      <pre>
      <xsl:for-each select="text/block">
        <xsl:if test="@text='copied'">
          <span class="highlight"><xsl:value-of select="."/></span>
        </xsl:if>
        <xsl:if test="@text='normal'">
          <span style="normal"><xsl:value-of select="."/></span>
        </xsl:if>
      </xsl:for-each>
      </pre>
    </xsl:for-each>
  </body>
</html>
STYLESHEET
      end
    end
  end

end
